; Vector3: X grows right, Y down, Z forward.
; On the FP stack and in memory it looks like {Y X Z} (sometimes I need only Y).

org 100h ; assume al=0 bx=0 sp=di=-2 si=0100h bp=09??h
C19  dw 19                 ;=19 00  adc ax,[bx+si]
C196 dw 196    ; ax=13h    ;=C4 00  les ax,[bx+si]
RO_Z equ $-1-4 ; ray_origin.z = about -512.3 (don't care about LSbyte)

%define S(x) [byte x + si-100h]

P:int 10h      ; video mode, default palette
DI_ equ -2     ; pixel_adr@di = -2 (-3 would be correct, but l/r edges are the same)

;Each frame: the visible pixels are A0000..AF9FF, I want X=0 Y=0 in the center
M:mov dx,0xA000-10-20-20-4 ;=0x9fca
  mov es,dx    ; dx:bx=YX:XX = 0x9fca:0

;Generate gem normals to p0..p19=[bp+200h,300h,...].
  pusha  ; adr:   -18 -16 -14 -12 -10  -8  -6  -4  -2
         ; stack:  di  si  bp  sp  bx  dx  cx  ax   0
C3211:   ; data:   -2 100 9??  -2  0  9fca T  key
  mov cx,[si]  ; i@cx = 19...1
G:add bp,si    ; bp points to p[i]; carry=0
  pusha
  fninit       ; clear FP stack
  fldln2
N:fchs
  loop N             ;|z=0.693*(-1^i)
  fild word[-6-16+di-DI_]  ; pushed i
  fsincos            ;|y=cos(i) x=sin(i) z   ; len=1.2167

;Do a bunch of rotations.
  mov cl,31    ; j@bx=0..
R:call STORE_BP      ;|y x z -> p[i]
  fld dword[bp+si+4] ;|x
  fild word[-6+di-DI_]
  fidiv word[bx+si]  ;|t=T/[19,-15360,196,...][j]
  fsincos            ;|c=cos(t) s=sin(t) x
S:fld dword[bp+si+8] ;|z c s x      ;|z sy c sz x
  fmul st2           ;|sz c s x     ;|cz sy c sz x
  fxch st2           ;|s c sz x     ;|c sy cz sz x
  fmul dword[bp+si]  ;|sy c sz x    ;|cy sy cz sz x
  cmc
BIG equ $-3 ;=1928661720
  jc S         ; loop twice
  fsubp st3,st0      ;|sy cz sz-cy x
  faddp              ;|ynew=sy+cz xnew=sz-cy znew=x
  inc bx
  loop R       ; a vector stays on the stack
  popa
  loop G
  popa

;Each pixel: cx=T dx:bx=YX:XX(init=9fca:0) di=adr(init=0)
X:
  inc dx       ; part of "dx:bx += 0x0000CCCD"
X2:
  fninit       ; adr:     -18 -16 -14 -12 -10  -8  -6  -4  -2
  pusha        ; stack:    di  si  bp  sp  bx  dx  cx  ax   0
  xor di,di    ; s16:  pixadr 100 9??  -2  ..X..Y  T result

;Compute ray direction.
  fld1
  fild dword[di-11]
  fild dword S(BIG)
  fdiv st1,st0
  fidivr dword[di-10]
  call STORE_BP      ;|Y/BIG X/BIG 1 -> rd

;Hit the gem. front_plane @ dx, back_plane @ gs
GEM_OUTER:
  fild dword[si]     ;|tfront=0 tback=HUGE=0xC40013
  fldz
  mov cx,[si]  ; i@cx = 19...1; bx points to p[i]
  lea bx,[bp+si]

;Ray-plane intersection.
I:call DOT_BPBX      ;|D=(p[i].n|rd) tf tb
  ftst
  fnstsw ax
  sahf         ; cf=1 if we're in front of the plane
  fld dword S(RO_Z)
  fmul dword[bx+si+8];|(p[i].n|ro) D tf tb
  fisubr word S(C196);|N=pd-(p[i].n|ro) D tf tb   ; pd=196
  fdivrp st1         ;|t=N/D tf tb
  jnc BACK
FRONT:
  fcom st1
  fnstsw ax
  sahf
  jb NEXT      ;if t>=tf { tf=t; fr@dx = current; }
  fst st1
  mov dx,bx
  jmp NEXT
BACK:
  fcom st2
  fnstsw ax
  sahf
  jnb NEXT     ;if t<tf { tb=t; bk@gs = current; }
  fst st2
  mov gs,bx
NEXT:
  fstp st0           ;|tf tb
  fcom
  fnstsw ax
  sahf         ;if tf>=fb { no_hit: cf=0; early exit } else { cf=1 }
  jnb EXIT
  lea bx,[bx+si]; don't overwrite carry
  loop I
EXIT:

  fld dword[bp+si]
  fabs               ;|bgd_color=abs(rd.y)  (actually rd.y^2 with gamma)
  jnc D        ; no hit
  mov bx,gs    ; carry=1
C:call DOT_BPBX      ;|(rd|bk.n)            ;|(rd|fr.n)
  fmul dword[bx+si]
  fsubr dword[bp+si] ;|(rd|bk.n)*bk.y-rd.y  ;|(rd|fr.n)*fr.y-rd.y
  fmul st0           ;|bkC                  ;|frC bkC
  mov bx,dx
  cmc
  jnc C        ; loop twice
  fadd st0
  faddp              ;|gem_color=frC*2+bkC
  fsqrt        ; gamma correction
D:fimul word S(C3211)
  ;fimul word[si]
  fistp word[di-4] ; color*3211 -> pushed ax
  popa

; 4-bit builtin gray palette with cheap dithering.
  cmp bl,al
  mov al,ah
  adc al,0xF0
  jnc O
  salc
O:add al,0x20
  ;add al,16

;; Fast version: draw each pixel twice.
;  stosb
;  add bx,0xCCCD ;dx:bx = YXX += 0000CCCD
;  adc dx,0

  stosb
  add bx,0xCCCD ;dx:bx = YXX += 0000CCCD
  jnc X2
  jnz X        ;do 65536 iterations

  inc cx       ; T++
  in al,60h
  dec al
  jnz M        ; fallthrough

STORE_BP: ; { a.y .x .z } --> a[bp]
  fstp dword[bp+si]
  fstp dword[bp+si+4]
  fstp dword[bp+si+8]
  ret

DOT_BPBX:  ; a[bp] b[bx] --> { (a dot b).y .x .z }
  fld dword[bp+si]
  fmul dword[bx+si]
  fld dword[bp+si+4]
  fmul dword[bx+si+4]
  faddp
  fld dword[bp+si+8]
  fmul dword[bx+si+8]
  faddp
  ret
